use "$raw/all_s6_mock_noPII.dta", clear

local principal HIST	ECON	GEOG	ENT IRE	AGRIC	LUG	CRE	PHY	MTC	BIO	CHEM	ART	LIT  KISW  ARB TD  
foreach x of local principal {
replace `x'="6" if `x'=="A"
replace `x'="5" if `x'=="B"
replace `x'="4" if `x'=="C"
replace `x'="3" if `x'=="D"
replace `x'="2" if `x'=="E"
replace `x'="2" if `x'=="E "
replace `x'="1" if `x'=="O"
replace `x'="0" if `x'=="F"
destring `x', replace force

}

local subsidiary SMA CST GEP
foreach x of local subsidiary {
replace `x'=18 if `x'==1
replace `x'=17 if `x'==2
replace `x'=16 if `x'==3
replace `x'=15 if `x'==4
replace `x'=14 if `x'==5
replace `x'=13 if `x'==6
replace `x'=12 if `x'==7
replace `x'=11 if `x'==8
replace `x'=10 if `x'==9
replace `x'=`x'-10
}



gen sub_point=0 if GEP!=.
replace sub_point=1 if SMA>=3 & SMA!=.
replace sub_point=sub_point+1 if CST>=3 & CST!=.
replace sub_point=sub_point+1 if GEP>=3 & GEP!=.
label variable sub_point "0-2 if passed subsidiary exams"

egen subsidiary=rowtotal(SMA CST)
la var subsidiary "SMA or CST score"
gen subsidiary_gep=subsidiary+GEP
la var subsidiary_gep "subsidary and general paper score"

egen principal=rowtotal(`principal')
la var principal "Principal paper score"

foreach x of local principal {
egen mean_`x' = mean(`x') 
egen sd_`x' = sd(`x') 
egen mean_`x'2=max(mean_`x')
la var mean_`x'2 "Mean of `x'"
drop mean_`x'
egen sd_`x'2=max(sd_`x')
la var sd_`x'2 "Sd of `x' "
drop sd_`x'
gen std_`x' = (`x' - mean_`x'2) / sd_`x'2
la var std_`x'  "Standardised score in `x'"
}


egen std_principal=rowtotal(std_*), missing
replace std_principal=std_principal/3
la var std_principal "Average of standardised principal scores"

egen mean_sub_point=mean(sub_point) 
egen sd_sub_point=sd(sub_point) 
egen mean_sub_point2=max(mean_sub_point)
la var mean_sub_point2 "Mean of sub point"
drop mean_sub_point
egen sd_sub_point2=max(sd_sub_point)
la var sd_sub_point2 "Sd of sub point"
drop sd_sub_point
gen std_sub_point = (sub_point - mean_sub_point2) / sd_sub_point2
la var std_sub_point "Standarised subsidiary point (0-2)"

foreach x of local subsidiary {
egen mean_`x' = mean(`x') 
egen sd_`x' = sd(`x') 
egen mean_`x'2=max(mean_`x')
la var mean_`x'2 "Mean of `x'"
drop mean_`x'
egen sd_`x'2=max(sd_`x')
la var sd_`x'2 "Sd of `x'"
drop sd_`x'
gen std_`x' = (`x' - mean_`x'2) / sd_`x'2
la var std_`x' "Standardised score in`x'"
}


egen std_subsidiary=rowtotal(std_SMA std_CST std_GEP), missing
replace std_subsidiary=std_subsidiary/2
la var std_subsidiary "Average of standardised subsidiary score"

//renormalise the aggregates
foreach x of varlist  std_principal std_subsidiary POINTS  {
egen mean_`x' = mean(`x') 
egen sd_`x' = sd(`x') 
egen mean_`x'2=max(mean_`x')
la var mean_`x'2 "Mean of `x'"
drop mean_`x'
egen sd_`x'2=max(sd_`x')
la var sd_`x'2 "Sd of `x'"
drop sd_`x'
gen `x'2_mock = (`x' - mean_`x'2) / sd_`x'2
local label:var la `x'
local label="Re-normalised " + lower("`label'")
la var `x'2_mock "`label'"
}

sum POINTS2_mock

la var POINTS2_mock "standardised raw points score"
ren POINTS POINTS_mock 
ren subsidiary_gep subsidiary_gep_mock
ren GEP GEP_mock
ren SMA SMA_mock
ren CST CST_mock


keep  SCHOOL  POINTS2_mock GEP_mock SMA_mock CST_mock std_principal2_mock subsidiary_gep_mock std_subsidiary2_mock POINTS_mock id
save "$cleaned/all_s6_mock_cleaned.dta", replace
